{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<h2 align='center' style='color:blue'>Finding best model and hyper parameters for sklearn digits dataset classification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import datasets\n",
    "digits = datasets.load_digits()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import svm\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.naive_bayes import GaussianNB\n",
    "from sklearn.naive_bayes import MultinomialNB\n",
    "from sklearn.tree import DecisionTreeClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_params = {\n",
    "    'svm': {\n",
    "        'model': svm.SVC(gamma='auto'),\n",
    "        'params' : {\n",
    "            'C': [1,10,20],\n",
    "            'kernel': ['rbf','linear']\n",
    "        }  \n",
    "    },\n",
    "    'random_forest': {\n",
    "        'model': RandomForestClassifier(),\n",
    "        'params' : {\n",
    "            'n_estimators': [1,5,10]\n",
    "        }\n",
    "    },\n",
    "    'logistic_regression' : {\n",
    "        'model': LogisticRegression(solver='liblinear',multi_class='auto'),\n",
    "        'params': {\n",
    "            'C': [1,5,10]\n",
    "        }\n",
    "    },\n",
    "    'naive_bayes_gaussian': {\n",
    "        'model': GaussianNB(),\n",
    "        'params': {}\n",
    "    },\n",
    "    'naive_bayes_multinomial': {\n",
    "        'model': MultinomialNB(),\n",
    "        'params': {}\n",
    "    },\n",
    "    'decision_tree': {\n",
    "        'model': DecisionTreeClassifier(),\n",
    "        'params': {\n",
    "            'criterion': ['gini','entropy'],\n",
    "            \n",
    "        }\n",
    "    }     \n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:841: DeprecationWarning: The default of the `iid` parameter will change from True to False in version 0.22 and will be removed in 0.24. This will change numeric results when test-set sizes are unequal.\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>best_score</th>\n",
       "      <th>best_params</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>svm</td>\n",
       "      <td>0.949360</td>\n",
       "      <td>{'C': 1, 'kernel': 'linear'}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>random_forest</td>\n",
       "      <td>0.899833</td>\n",
       "      <td>{'n_estimators': 10}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>logistic_regression</td>\n",
       "      <td>0.920979</td>\n",
       "      <td>{'C': 1}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>naive_bayes_gaussian</td>\n",
       "      <td>0.806344</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>naive_bayes_multinomial</td>\n",
       "      <td>0.871452</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>decision_tree</td>\n",
       "      <td>0.817474</td>\n",
       "      <td>{'criterion': 'entropy'}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     model  best_score                   best_params\n",
       "0                      svm    0.949360  {'C': 1, 'kernel': 'linear'}\n",
       "1            random_forest    0.899833          {'n_estimators': 10}\n",
       "2      logistic_regression    0.920979                      {'C': 1}\n",
       "3     naive_bayes_gaussian    0.806344                            {}\n",
       "4  naive_bayes_multinomial    0.871452                            {}\n",
       "5            decision_tree    0.817474      {'criterion': 'entropy'}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "import pandas as pd\n",
    "scores = []\n",
    "\n",
    "for model_name, mp in model_params.items():\n",
    "    clf =  GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)\n",
    "    clf.fit(digits.data, digits.target)\n",
    "    scores.append({\n",
    "        'model': model_name,\n",
    "        'best_score': clf.best_score_,\n",
    "        'best_params': clf.best_params_\n",
    "    })\n",
    "    \n",
    "df = pd.DataFrame(scores,columns=['model','best_score','best_params'])\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**For me the winner is svm (C=1, kernel=linear) with 94.93% score. It could be different for you as I have limited my parameters to be certain values only**"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
